SitemapBuilder.php

<?php

namespace Phad;

/**
 * A simple Sitemap builder that writes to disk immediately so memory use stays low.
 */
class SitemapBuilder {

    public $cache_dir;

    public $dir;

    /** file handles */
    public $handles = [];

    /** sitemap handlers that modify sitemap data */
    public $handlers = [];

    /**
     * a router object that is used to parse the url patterns
     */
    public $router;

    /**
     * A pdo instance for performing queries
     */
    public ?\PDO $pdo;

    public bool $throw_on_query_failure = false;

    /** the website to prefix all <loc> paths with, such as https://example.com 
     */
    public string $host = '';

    /**
     *
     * @param $storageDir directory to write your xml files to.
     */
    public function __construct($storageDir){
        $this->dir = $storageDir;
        if (!is_dir($this->dir)){
            mkdir($this->dir, 0654);
            // throw new \Exception("\nSitemap Dir '$storageDir' does not exist \n");
        }
    }

    /**
     * get an existing file handle or open a new one
     * @return a file handle for use with fwrite()
     */
    protected function handle($name){
        if (isset($this->handles[$name]))return $this->handles[$name];
        $this->handles[$name] = fopen($this->dir.'/'.$name, 'w+');

        //write the opening for a sitemap xml file

        $sitemapOpen = 
        <<<XML
        <?xml version="1.0" encoding="UTF-8"?>
        <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
        XML;
        fwrite($this->handles[$name], $sitemapOpen);

        return $this->handles[$name];
    }
    /**
     * Close any open file handles
     */
    public function close($name){
        if (!isset($this->handles[$name]))$name = $name.'.xml';
        // var_dump("close: ". $name);
        $handle = $this->handles[$name] ?? null;
        if ($handle==null)return;
        unset($this->handles[$name]);
        //write </sitemap>
        fwrite($handle, "\n</urlset>");
        fclose($handle);

        // echo "closed the handle!!";
        // exit;
    }

    /**
     * Writes a new entry to the target xml file
     * @param $sitemapName the name of the sitemap, with or without .xml
     * @param $entry `key=>value` array to write as `<key>value</key>`
     */
    public function addEntry($sitemapName, $entry){
        if (substr($sitemapName,-4)!='.xml')$sitemapName .= '.xml';
        $h = $this->handle($sitemapName);
        ksort($entry);
 
        $ident = '    ';
        fwrite($h, "\n$ident<url>");
        foreach ($entry as $key=>$value){
            if ($value==null||trim($value)=='')continue;
            if ($key=='last_mod')$key = 'lastmod';
            fwrite($h, "\n$ident    <$key>$value</$key>");
        }
        fwrite($h, "\n$ident</url>");
    }


    /**
     * Parse the stored xml file for each url entry
     */
    public function get_stored_entries($sitemapName){
        if (substr($sitemapName,-4)!='.xml')$sitemapName .= '.xml';
        // if (isset($this->handles[$name]))throw new \Exception("There is an ")
        $file = $this->dir.'/'.$sitemapName;
        if (!is_file($file))throw new \Exception("There is no file '{$file}'. Cannot lookup entries.");
        $content = file_get_contents($file);
        $doc = new \Taeluf\PHTML($content);
        $out = [];
        $all = [];
        foreach ($doc->xpath('//url') as $entry){
            foreach ($entry->children as $cn){
                if ($cn->nodeName=='#text')continue;
                $out[$cn->nodeName] = trim($cn->innerText);
            }
            $all[] = $out;
        }

        return $all;
    }

    public function __destruct(){
        
        foreach ($this->handles as $name=>$h){
            $this->close($name);
        }
    }


    public function build_entries(array $sitemap_data){
        // sample sitemap data:
        // $sitemap_data =
        // [
        //     'sql' => 'SELECT slug FROM blog',
        //     'filter'=> 'ns:filter_name',
        //     'priority' => '0.8',
        //     'last_mod' => '1354',
        //     'changefreq' => 'daily',
        //     'pattern' => '/blog/{slug}',
        // ];

        $base = array_merge(
            ['priority'=>null,
            'last_mod'=>null,
            'changefreq'=>null,
            ],
            $sitemap_data,
        );
        unset($base['sql']);
        unset($base['handler']);
        unset($base['pattern']);

        $pattern = $sitemap_data['pattern'];
        $pattern = $this->host.$pattern;
        $parsed = $this->parse_pattern($pattern);



        // return one entry if no sql given
        if (!isset($sitemap_data['sql'])){
            $entry = $base;
            $entry['loc'] = $pattern;
            $entries = [
                $entry
            ];
            return $entries;
        }

        $results = $this->get_results($sitemap_data['sql']);
        $entries = [];
        foreach ($results as $r){
            $url = $this->fill_pattern($parsed, $r);
            $out = [];
            // override default $out['priority'] with $result['priority'], last_mod, and change_freq if result is set
            foreach ($base as $k=>$s){
                $out[$k] = $r[$k] ?? $base[$k];
            }
            $out['loc'] = $url;
            // $entries[] = array_merge($base, ['loc'=>$url]);
            $entries[] = $out;
            
        }

        return $entries;
    }

    public function get_results($sql){
        $pdo = $this->pdo;
        $stmt = $pdo->prepare($sql);
        if ($stmt===false){
            if ($this->throw_on_query_failure){
                print_r($pdo->errorInfo());
                throw new \PDOException("Could not prepare query...");
            }
            return false;
        }

        $stmt->execute();
        return $stmt->fetchAll(\PDO::FETCH_ASSOC);
    }

    public function parse_pattern($pattern){
        $router = $this->router;
        $parsed = $router->decode_pattern($pattern);
        return $parsed;
    }
    public function fill_pattern($parsed, array $values){
        $out = $parsed['pattern'];
        foreach ($parsed['params'] as $p){
            $out = str_replace('{'.$p.'}', $values[$p], $out);
        }

        return $out;
    }

    /**
     *
     * @param $sitemap_data_list array from @see(get_sitemap_list())
     * @param $sitemap_name
     * @return string sitemap file name (like sitemap.xml)
     */
    public function make_sitemap(array $sitemap_data_list, $sitemap_name='sitemap'){
        // $sitemaps = [];
        foreach ($sitemap_data_list as $sitemap_data){
            $entries = $this->build_entries($sitemap_data);

            foreach($entries as $e){
                $this->addEntry($sitemap_name, $e);
            }
            // $sitemaps[] = $sitemap_name.'.xml';
        }
        $this->close($sitemap_name);

        // return $sitemaps;
        return $sitemap_name.'.xml';
    }

    /**
     * @param $sitemap_data_list array from @see(get_sitemap_list())
     * @return array of sitemap entries
     */
    public function get_sitemap_as_array(array $sitemap_data_list){
        $all_entries = [];
        foreach ($sitemap_data_list as $sitemap_data){
            $entries = $this->build_entries($sitemap_data);
            foreach ($entries as $e){
                $all_entries[] = $e;
            }
        }
        return $all_entries;
    }

    /**
     * @param $items @see(Phad::get_all_items())
     * @param $phad a phad instance
     * @return an array to pass to @see(this::make_sitemap())
     */
    public function get_sitemap_list($items, $phad){
        $sm_list = [];
        foreach ($items as $v){
            $item = $phad->item($v,[]);
            $sm = $item->sitemap_data();
            if (!is_array($sm))continue;
            $sm_list = array_merge($sm_list, array_values($sm));
        }
        return $sm_list;
    }

    // public function buildSitemap(array $sitemapData, $sitemapName = 'sitemap.xml'){
    //     $sitemapBuilder = new \Phad\SitemapBuilder($this->cache_dir);
    //
    //     $defaults = [
    //         'priority'=>null,
    //         'last_mod'=>null,
    //         'changefreq'=>null,
    //     ];
    //
    //     foreach ($sitemapData as $s){
    //         $entry = [];
    //         $pdo = $this->pdo;
    //         $stmt = $pdo->prepare($s['sql']);
    //         if ($stmt===false){
    //             if ($this->throw_on_query_failure){
    //                 print_r($pdo->errorInfo());
    //                 throw new \PDOException("Could not prepare query...");
    //             }
    //             return false;
    //         }
    //         $res = $stmt->execute();
    //         $pattern = $s['pattern'];
    //         $parsed = $this->router->decode_pattern($pattern);
    //         $params = $parsed['params'];
    //         $handler = null;
    //         if (isset($s['handler'])){
    //             $handler = $this->handlers[$s['handler']]??null;
    //             if ($handler==null){
    //                 $handlerName = $s['handler'];
    //                 throw new \Exception("There was no handler found for '$handlerName'");
    //             }
    //         }
    //         while ($row = $stmt->fetch(\PDO::FETCH_ASSOC)){
    //             $ownPattern = $pattern;
    //             foreach ($params as $p){
    //                 $ownPattern = str_replace('{'.$p.'}', $row[$p], $pattern);
    //             }
    //             $entry['loc'] = $ownPattern;
    //
    //             //for each `sm_` column, just directly copy it to array
    //             foreach ($row as $col=>$val){
    //                 if (substr($col,0,3)=='sm_'){
    //                     $entry[substr($col,3)] = $val;
    //                 }
    //             }
    //
    //             if ($handler!=null){
    //                 $entry = $handler($entry, $row, $s);
    //                 if ($entry===false)continue;
    //                 else if (!is_array($entry)){
    //                     $handlerName = $s['handler'];
    //                     throw new \Exception("Your handler '$handlerName' MUST return boolean false or an array");
    //                 }
    //             }
    //
    //             foreach ($defaults as $k=>$value){
    //                 if (isset($entry[$k]))continue;
    //                 $value = $value ?? $s[$k] ?? null;
    //                 if ($value==null)continue;
    //                 $entry[$k] = $value;
    //             }
    //
    //             $sitemapBuilder->addEntry($sitemapName, $entry);
    //         }
    //     }
    //     //do stuff
    //
    //     return $sitemapBuilder;
    // }
}